# Scrapy settings for caomei project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = "caomei"

SPIDER_MODULES = ["caomei.spiders"]
NEWSPIDER_MODULE = "caomei.spiders"


# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = "caomei (+http://www.yourdomain.com)"

USER_AGENTS = [
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
    "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
    "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
    "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
    "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
    "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
    "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
]

PROXY_LIST = {
    'http': ['http://183.207.95.27:80', 'http://111.6.100.99:80', 'http://122.72.99.103:80',
           'http://106.46.132.2:80', 'http://112.16.4.99:81', 'http://123.58.166.113:9000',
           'http://118.178.124.33:3128', 'http://116.62.11.138:3128', 'http://121.42.176.133:3128',
           'http://111.13.2.131:80', 'http://111.13.7.117:80', 'http://121.248.112.20:3128',
           'http://112.5.56.108:3128', 'http://42.51.26.79:3128', 'http://183.232.65.201:3128',
           'http://118.190.14.150:3128', 'http://123.57.221.41:3128', 'http://183.232.65.203:3128',
           'http://166.111.77.32:3128', 'http://42.202.130.246:3128', 'http://122.228.25.97:8101',
           'http://61.136.163.245:3128', 'http://121.40.23.227:3128', 'http://123.96.6.216:808',
           'http://59.61.72.202:8080', 'http://114.141.166.242:80', 'http://61.136.163.246:3128',
           'http://60.31.239.166:3128', 'http://114.55.31.115:3128', 'http://202.85.213.220:3128'],
    'https': ['https://183.207.95.27:80', 'https://111.6.100.99:80', 'https://122.72.99.103:80',
           'https://106.46.132.2:80', 'https://112.16.4.99:81', 'https://123.58.166.113:9000',
           'https://118.178.124.33:3128', 'https://116.62.11.138:3128', 'https://121.42.176.133:3128',
           'https://111.13.2.131:80', 'https://111.13.7.117:80', 'https://121.248.112.20:3128',
           'https://112.5.56.108:3128', 'https://42.51.26.79:3128', 'https://183.232.65.201:3128',
           'https://118.190.14.150:3128', 'https://123.57.221.41:3128', 'https://183.232.65.203:3128',
           'https://166.111.77.32:3128', 'https://42.202.130.246:3128', 'https://122.228.25.97:8101',
           'https://61.136.163.245:3128', 'https://121.40.23.227:3128', 'https://123.96.6.216:808',
           'https://59.61.72.202:8080', 'https://114.141.166.242:80', 'https://61.136.163.246:3128',
           'https://60.31.239.166:3128', 'https://114.55.31.115:3128', 'https://202.85.213.220:3128']
}

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# 降低log级别
LOG_LEVEL = 'INFO'

# 保存图片路径
IMAGES_STORE = 'D:\\file\\my'

# 需要下载的图片url字段名 item中定义的名字
IMAGE_URLS_FILED = "img_link"

# 增加之后解决运行不报错，但是下载不了图片的问题
MEDIA_ALLOW_REDIRECTS = True

# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
# 禁止cookie
# 如果不是真的需要 cookie，则在 scrapy 爬取数据的时候可以 禁止 cookie 从而减少 CPU 的使用率，提升爬虫效率
COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
#    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
#    "Accept-Language": "en",
#}

# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
#    "caomei.middlewares.CaomeiSpiderMiddleware": 543,
#}

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
   'caomei.middlewares.SeleniumMiddleware': 543,  # 让配置的浏览器驱动生效
   'caomei.middlewares.UserAgent': 290,  # 随机User-Agent
   'caomei.middlewares.RandomProxy': 291,  # 随机代理
}

# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    "scrapy.extensions.telnet.TelnetConsole": None,
#}

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
   # "caomei.pipelines.CaomeiPipeline": 300,
   # "caomei.pipelines.DownLoadImgPipeline": 300,
   # "caomei.pipelines.DownLoadPipeline": 300,
   "caomei.pipelines.DBPipeline": 300,
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = "httpcache"
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"

# Set settings whose default value is deprecated to a future-proof value
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
FEED_EXPORT_ENCODING = "utf-8"
